home *** CD-ROM | disk | FTP | other *** search
Text File | 2000-10-06 | 6.7 KB | 369 lines | [TEXT/CWIE] |
- ///--------------------------------------------------------------------------------------
- // BlitPixieBlitClear - cache-optimized screen blitter
- //
- // written by Anders F Björklund <afb@algonet.se>
- // ©2000 afb.
- ///--------------------------------------------------------------------------------------
-
- #ifndef __BLITPIXIE__
- #include "BlitPixieHeader.h"
- #endif
-
- #include "BlitPixieAsm.h"
-
- #pragma mark *** PowerPC asm :
- #if USE_PPC_ASSEMBLY
-
- // NOTE: assumes dstRowBytes is multiple of 4 (for alignment purposes)
- // NOTE: assumes bytes, rows > 0
-
- ASM_FUNC void BlitPixieBlitClear(
- register unsigned char *src, // r3
- register unsigned char *dst, // r4
- register unsigned long color, // r5
- register unsigned long srcRowBytes, // r6
- register unsigned long dstRowBytes, // r7
- register unsigned short bytes, // r8
- register unsigned short rows ) // r9
- {
- #define r_src r3
- #define r_dst r4
- #define r_color r5
- #define r_srcRowBytes r6
- #define r_dstRowBytes r7
- #define r_width r8
- #define r_height r9
-
- #define r_srcStride r31
- #define r_dstStride r30
- #define r_bytes r29
- #define r_blocks r28
- #define r_y r27
-
- #define kRegisterSaveStack (5 * 4)
-
- ASM_BEGIN
- stmw r27,-kRegisterSaveStack(SP)
-
- stw r_color,-(kRegisterSaveStack+8)(SP)
- stw r_color,-(kRegisterSaveStack+4)(SP)
-
- mr r_bytes,r_width
- mr r_y,r_height
-
- sub r_srcStride,r_srcRowBytes,r_width
- sub r_dstStride,r_dstRowBytes,r_width
-
- neg r0,r4
- rlwinm r0,r0,0,27,31
- sub r_bytes,r_bytes,r0
-
- subi r_src,r_src,8
- subi r_dst,r_dst,8
-
- lfd fp0,-(kRegisterSaveStack+8)(SP)
-
- rlwinm. r_blocks,r_bytes,27,5,31
- rlwinm r_bytes,r_bytes,0,27,31
-
- #define FLAG_BLOCKS 20
- // #define FLAG_FREE 21 // note: free for use
-
- #define FLAG_PRE1 26
- #define FLAG_PRE2 25
- #define FLAG_PRE4 24
- #define FLAG_PRE8 23
- #define FLAG_PRE16 22
-
- #define FLAG_POST1 31
- #define FLAG_POST2 30
- #define FLAG_POST4 29
- #define FLAG_POST8 28
- #define FLAG_POST16 27
-
- rlwinm r0,r0,27-FLAG_PRE16,FLAG_PRE16,FLAG_PRE1
- rlwimi r0,r_bytes,27-FLAG_POST16,FLAG_POST16,FLAG_POST1
- mtcrf 0x07,r0 // cr5 | cr6 | cr7
- crnor FLAG_BLOCKS,0*CR_NO + CR_EQ,0*CR_NO + CR_EQ
-
- @rowloop:
- mtctr r_blocks
-
- // copy pre-block
- bc IF_NOT,FLAG_PRE1,@skip_pre1
-
- lbz r0,8(r3)
- addi r3,r3,1
- stb r0,8(r4)
- addi r4,r4,1
- stb r_color,7(r3)
-
- @skip_pre1:
- bc IF_NOT,FLAG_PRE2,@skip_pre2
-
- lhz r0,8(r3)
- addi r3,r3,2
- sth r0,8(r4)
- addi r4,r4,2
- sth r_color,6(r3)
-
- @skip_pre2:
- bc IF_NOT,FLAG_PRE4,@skip_pre4
-
- lwz r0,8(r3)
- addi r3,r3,4
- stw r0,8(r4)
- addi r4,r4,4
- stw r_color,4(r3)
-
- @skip_pre4:
- bc IF_NOT,FLAG_PRE8,@skip_pre8
-
- lfd fp1,8(r3)
- addi r3,r3,8
- stfd fp1,8(r4)
- addi r4,r4,8
- stfd fp0,0(r3)
-
- @skip_pre8:
- bc IF_NOT,FLAG_PRE16,@skip_pre16
-
- lfd fp1,8(r3)
- lfd fp2,16(r3)
- addi r3,r3,16
- stfd fp1,8(r4)
- stfd fp2,16(r4)
- addi r4,r4,16
- stfd fp0,-8(r3)
- stfd fp0,0(r3)
-
- @skip_pre16:
-
- // copy blocks
- bc IF_NOT,FLAG_BLOCKS,@skipblockloop
- li r0,8
-
- @blockloop:
- lfd fp1,8(r3)
- lfd fp2,16(r3)
- lfd fp3,24(r3)
- lfd fp4,32(r3)
-
- stfd fp1,8(r4)
- stfd fp2,16(r4)
- stfd fp3,24(r4)
- stfdu fp4,32(r4)
-
- stfd fp0,8(r3)
- stfd fp0,16(r3)
- stfd fp0,24(r3)
- stfdu fp0,32(r3)
-
- bdnz @blockloop
- @skipblockloop:
-
- subic. r_y,r_y,1
-
- // copy post-block
- bc IF_NOT,FLAG_POST16,@skip_post16
-
- lfd fp1,8(r3)
- lfd fp2,16(r3)
- addi r3,r3,16
- stfd fp1,8(r4)
- stfd fp2,16(r4)
- addi r4,r4,16
- stfd fp0,-8(r3)
- stfd fp0,0(r3)
-
- @skip_post16:
- bc IF_NOT,FLAG_POST8,@skip_post8
-
- lfd fp0,8(r3)
- addi r3,r3,8
- stfd fp0,8(r4)
- addi r4,r4,8
- stfd fp0,0(r3)
-
- @skip_post8:
- bc IF_NOT,FLAG_POST4,@skip_post4
-
- lwz r0,8(r3)
- addi r3,r3,4
- stw r0,8(r4)
- addi r4,r4,4
- stw r_color,4(r3)
-
- @skip_post4:
- bc IF_NOT,FLAG_POST2,@skip_post2
-
- lhz r0,8(r3)
- addi r3,r3,2
- sth r0,8(r4)
- addi r4,r4,2
- sth r_color,6(r3)
-
- @skip_post2:
- bc IF_NOT,FLAG_POST1,@skip_post1
-
- lbz r0,8(r3)
- addi r3,r3,1
- stb r0,8(r4)
- addi r4,r4,1
- stb r_color,7(r3)
-
- @skip_post1:
-
- add r3,r3,r_srcStride
- add r4,r4,r_dstStride
-
- bne @rowloop
-
- lmw r27,-kRegisterSaveStack(SP)
- ASM_END
- }
-
- #pragma mark *** 680x0 asm :
- #elif USE_68K_ASSEMBLY
-
- ASM_FUNC void BlitPixieBlitClear(
- unsigned char *src,
- unsigned char *dst,
- unsigned long color,
- unsigned long srcRowBytes,
- unsigned long dstRowBytes,
- unsigned short bytes,
- unsigned short rows)
- {
- #define D_color D2
- #define D_bytes D3
- #define D_rows D4
- #define D_srcBytes D5
- #define D_dstBytes D6
-
- ASM_BEGIN
-
- MOVEM.L D3-D6/A2,-(SP)
-
- MOVE.L src,A0
- MOVE.L dst,A1
- MOVE.L color,D_color
- MOVE.L srcRowBytes,D_srcBytes
- MOVE.L dstRowBytes,D_dstBytes
- MOVE.W bytes,D_bytes
- MOVE.W rows,D_rows
-
- EXT.L D_bytes
- SUB.L D_bytes,D_srcBytes
- SUB.L D_bytes,D_dstBytes
-
- // *** LOOP SETUP ***
- MOVEQ #15,D0
- CLR.L D1
-
- MOVE.W D_bytes,D1
- LSR.W #2,D1 // / sizeof(long)
- AND.W D0,D1
- LSR.W #2,D1 // * sizeof(MOVE.L (A0),(A1)+; MOVE.L D_color,(A0)+)
- LEA @loopend,A2
- SUBA.L D1,A2
-
- MOVE.W D_bytes,D1
- LSR.W #6,D1
-
- // *** COPY ***
-
- @rowloop:
-
- // align to word boundary
- // main word copy loop
- MOVE.W D1,D0
- JMP (A2)
- @loopstart:
- MOVE.L (A0),(A1)+
- MOVE.L D_color,(A0)+
- MOVE.L (A0),(A1)+
- MOVE.L D_color,(A0)+
- MOVE.L (A0),(A1)+
- MOVE.L D_color,(A0)+
- MOVE.L (A0),(A1)+
- MOVE.L D_color,(A0)+
- MOVE.L (A0),(A1)+
- MOVE.L D_color,(A0)+
- MOVE.L (A0),(A1)+
- MOVE.L D_color,(A0)+
- MOVE.L (A0),(A1)+
- MOVE.L D_color,(A0)+
- MOVE.L (A0),(A1)+
- MOVE.L D_color,(A0)+
- MOVE.L (A0),(A1)+
- MOVE.L D_color,(A0)+
- MOVE.L (A0),(A1)+
- MOVE.L D_color,(A0)+
- MOVE.L (A0),(A1)+
- MOVE.L D_color,(A0)+
- MOVE.L (A0),(A1)+
- MOVE.L D_color,(A0)+
- MOVE.L (A0),(A1)+
- MOVE.L D_color,(A0)+
- MOVE.L (A0),(A1)+
- MOVE.L D_color,(A0)+
- MOVE.L (A0),(A1)+
- MOVE.L D_color,(A0)+
- MOVE.L (A0),(A1)+
- MOVE.L D_color,(A0)+
- @loopend:
- DBRA D0,@loopstart
-
- // do left-overs
- MOVE.W D_bytes,D0
- ANDI.W #2,D0
- BEQ.S @restword
- MOVE.W (A0),(A1)+
- MOVE.W D_color,(A0)+
- @restword:
- MOVE.W D_bytes,D0
- ANDI.W #1,D0
- BEQ.S @restbyte
- MOVE.B (A0),(A1)+
- MOVE.B D_color,(A0)+
- @restbyte:
-
- ADDA.L D_srcBytes,A0
- ADDA.L D_dstBytes,A1
-
- SUBQ.W #1,D_rows
- BNE.S @rowloop
-
- MOVEM.L (SP)+,D3-D6/A2
-
- ASM_END
- }
-
- #pragma mark *** Generic C :
- #elif USE_GENERIC_C
-
- void BlitPixieBlitClear(
- unsigned char *src,
- unsigned char *dst,
- unsigned long color,
- unsigned long srcRowBytes,
- unsigned long dstRowBytes,
- unsigned short bytes,
- unsigned short rows)
- {
- BLITPIXIE_ASSERT(rows > 0 );
- BLITPIXIE_ASSERT(bytes > 0 );
-
- while (rows--)
- {
- BlitPixieMemCopy( dst, src, bytes );
- BlitPixieMemSet( src, color, bytes );
- src += srcRowBytes;
- dst += dstRowBytes;
- }
- }
-
- #endif // GENERATING…
-
-